package cn.edu.bjtu.io;

import java.io.IOException;
import java.util.List;

import org.datavec.api.records.reader.impl.LineRecordReader;
import org.datavec.api.writable.Writable;
import org.deeplearning4j.text.sentenceiterator.SentenceIterator;
import org.deeplearning4j.text.sentenceiterator.SentencePreProcessor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import cn.edu.bjtu.core.SentenceDetect;
/*
 * 2017/5/26
 */
public class LineSentenceRecordReader extends LineRecordReader implements SentenceIterator{
	private static final long serialVersionUID = 1L;
	protected Logger log = LoggerFactory.getLogger(this.getClass());
	@Override
	public String nextSentence() {
		List<Writable> l = this.next();
		String line = l.get(0).toString();
		while (!canHandle(line) && hasNext()) {
			List<Writable> temp = next();
			line = temp.get(0).toString();
		}
		if (!canHandle(line)) {
			log.info("error foramt at {},of split {} ,content {}",
					this.lineIndex, this.splitIndex, line);
		}
		return line;
	}

	@Override
	public void finish() {
		try {
			super.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	@Override
	public SentencePreProcessor getPreProcessor() {
		// TODO Auto-generated method stub
		return null;
	}

	@Override
	public void setPreProcessor(SentencePreProcessor preProcessor) {
		// TODO Auto-generated method stub
		
	}
	private boolean canHandle(String strName) {
		return SentenceDetect.get().isFullfield(strName);
	}
}
